Baf53cre Ahr-cko vs ctl,
CR infection,
Intestinal RORgt+ Immune Cells
loading 72,000 for all
cellranger calling 34,620 cells, mean reads 11,163
t0 initial
filt.10x <- Read10X(data.dir = "I:/Shared_win/projects/20230927_10x_SZJ/output_demo/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`
dim(GEX)
## [1] 32285 34620
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCAAGAACAGGA-1 AAACCCAAGAATTGCA-1 AAACCCAAGCAGCCCT-1
## Xkr4 . . .
## Gm1992 . . .
## Gm19938 . . .
## Gm37381 . . .
## Rp1 . . .
## Sox17 . . .
## AAACCCAAGCCTGAAG-1 AAACCCAAGCGCATCC-1 AAACCCAAGTTGCCCG-1
## Xkr4 . . .
## Gm1992 . . .
## Gm19938 . . .
## Gm37381 . . .
## Rp1 . . .
## Sox17 . . .
dim(FB)
## [1] 8 34620
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCAAGAACAGGA-1 AAACCCAAGAATTGCA-1 AAACCCAAGCAGCCCT-1
## CKO.1 10 15 12
## CKO.2 1763 37 30
## CKO.3 17 35 17
## CKO.4 10 23 497
## CTL.1 3 8 4
## CTL.2 5 33 21
## CTL.3 9 641 16
## CTL.4 11 15 12
## AAACCCAAGCCTGAAG-1 AAACCCAAGCGCATCC-1 AAACCCAAGTTGCCCG-1
## CKO.1 574 7 5
## CKO.2 23 17 12
## CKO.3 14 19 4
## CKO.4 14 14 545
## CTL.1 3 5 310
## CTL.2 4 612 5
## CTL.3 8 13 6
## CTL.4 6 5 6
rowSums(FB)
## CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2 CTL.3 CTL.4
## 3828390 7470222 4598138 4351086 3110845 4072731 4035048 3556755
rowSums(FB>0)
## CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2 CTL.3 CTL.4
## 34523 34592 34593 34584 34307 34564 34567 34511
# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "ILC3_FB")
FB.seur
## An object of class Seurat
## 8 features across 34620 samples within 1 assay
## Active assay: RNA (8 features, 0 variable features)
rownames(FB.seur)
## [1] "CKO.1" "CKO.2" "CKO.3" "CKO.4" "CTL.1" "CTL.2" "CTL.3" "CTL.4"
perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html
# normalize
FB.seur <- NormalizeData(FB.seur)
#FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features
first define FB colors based on conditions
#scales::show_col(ggsci::pal_igv("default")(49))
#scales::show_col(ggsci::pal_d3("category20b")(20))
#scales::show_col(ggsci::pal_d3("category20c")(20))
#
color.FB <- ggsci::pal_d3("category20c")(20)[c(2,7,12,17,
1,6,11,16
)]
level.FB <- c(rownames(FB.seur))
color.FBraw <- c("#FF6C91","lightgrey",color.FB)
scales::show_col(color.FBraw, ncol = 4)
scales::show_col(color.FB, ncol = 4)
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
plot(sort(t(FB)[,5]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))
plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("CKO|CTL",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")
plot(table.demux$quantile, pch=16, ylab="mod-quantile")
#plot.new()
plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))
plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("CKO|CTL",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")
plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
#plot.new()
plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])
plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for CKO.1 : 85 reads
## Cutoff for CKO.2 : 237 reads
## Cutoff for CKO.3 : 46 reads
## Cutoff for CKO.4 : 121 reads
## Cutoff for CTL.1 : 23 reads
## Cutoff for CTL.2 : 160 reads
## Cutoff for CTL.3 : 86 reads
## Cutoff for CTL.4 : 92 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 9365 296 24959
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2
## 9365 296 2931 2977 3207 3122 3356 3146
## CTL.3 CTL.4
## 3150 3070
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.992)
## Cutoff for CKO.1 : 89 reads
## Cutoff for CKO.2 : 249 reads
## Cutoff for CKO.3 : 47 reads
## Cutoff for CKO.4 : 126 reads
## Cutoff for CTL.1 : 24 reads
## Cutoff for CTL.2 : 169 reads
## Cutoff for CTL.3 : 90 reads
## Cutoff for CTL.4 : 97 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 9317 315 24988
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2
## 9317 315 2937 2984 3213 3123 3357 3143
## CTL.3 CTL.4
## 3153 3078
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.994)
## Cutoff for CKO.1 : 94 reads
## Cutoff for CKO.2 : 265 reads
## Cutoff for CKO.3 : 49 reads
## Cutoff for CKO.4 : 134 reads
## Cutoff for CTL.1 : 25 reads
## Cutoff for CTL.2 : 181 reads
## Cutoff for CTL.3 : 95 reads
## Cutoff for CTL.4 : 103 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 9236 368 25016
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2
## 9236 368 2946 2987 3221 3128 3359 3129
## CTL.3 CTL.4
## 3162 3084
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.996)
## Cutoff for CKO.1 : 102 reads
## Cutoff for CKO.2 : 287 reads
## Cutoff for CKO.3 : 51 reads
## Cutoff for CKO.4 : 145 reads
## Cutoff for CTL.1 : 27 reads
## Cutoff for CTL.2 : 197 reads
## Cutoff for CTL.3 : 102 reads
## Cutoff for CTL.4 : 112 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 9155 459 25006
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2
## 9155 459 2944 2988 3229 3134 3344 3106
## CTL.3 CTL.4
## 3166 3095
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.998)
## Cutoff for CKO.1 : 115 reads
## Cutoff for CKO.2 : 325 reads
## Cutoff for CKO.3 : 55 reads
## Cutoff for CKO.4 : 163 reads
## Cutoff for CTL.1 : 29 reads
## Cutoff for CTL.2 : 225 reads
## Cutoff for CTL.3 : 115 reads
## Cutoff for CTL.4 : 127 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 9014 595 25011
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2
## 9014 595 2960 2977 3242 3136 3348 3067
## CTL.3 CTL.4
## 3178 3103
## tags q0.99
#cutoff.FB <- c(85,237,46,121,
# 23,160,86,92)
## custom
# ref
# CKO.1 q0.996 102
# CKO.2 q0.99 237
# CKO.3 q0.998 55
# CKO.4 q0.99 121
# CTL.1 q0.994 25
# CTL.2 q0.99 160
# CTL.3 q0.996 102
# CTL.4 q0.996 112
#
cutoff.FB <- c(102,237,55,121,
25,160,102,112)
names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2 CTL.3 CTL.4
## 102 237 55 121 25 160 102 112
data.frame(cutoff.FB=cutoff.FB)
## cutoff.FB
## CKO.1 102
## CKO.2 237
## CKO.3 55
## CKO.4 121
## CTL.1 25
## CTL.2 160
## CTL.3 102
## CTL.4 112
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])
## custom cutoff run this
custom.Demux <- function(FBobj,FBcutoff){
# define decision matrix
dd <- FBobj@assays[['HTO']]@counts
dd <- apply(dd, 2, function(x){
x > FBcutoff
})
x1 <- colSums(dd)
x1[x1>1] <- "Doublet"
x1[x1==0] <- "Negative"
x1[x1==1] <- "Singlet"
x2 <- x1
bc.slt <- names(x1)[x1=="Singlet"]
for(bc in bc.slt){
x2[bc] <- rownames(dd)[dd[,bc]]
}
FBdf <- data.frame(HTO_classification.global=factor(x1,
levels = c("Doublet","Negative","Singlet")),
hash.ID=factor(x2,
levels = c("Doublet","Negative",rownames(dd))))
return(FBdf)
}
df.FB <- custom.Demux(FB.seur,cutoff.FB)
## custom cutoff run this
dim(df.FB)
## [1] 34620 2
df.FB[1:10,]
## HTO_classification.global hash.ID
## AAACCCAAGAACAGGA-1 Singlet CKO.2
## AAACCCAAGAATTGCA-1 Singlet CTL.3
## AAACCCAAGCAGCCCT-1 Singlet CKO.4
## AAACCCAAGCCTGAAG-1 Singlet CKO.1
## AAACCCAAGCGCATCC-1 Singlet CTL.2
## AAACCCAAGTTGCCCG-1 Doublet Doublet
## AAACCCACAAGAGTGC-1 Doublet Doublet
## AAACCCACAAGTATCC-1 Doublet Doublet
## AAACCCACAGAAGTGC-1 Singlet CTL.3
## AAACCCACAGCGCTTG-1 Singlet CKO.4
## custom cutoff run this
table(df.FB)
## hash.ID
## HTO_classification.global Doublet Negative CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2
## Doublet 9197 0 0 0 0 0 0 0
## Negative 0 337 0 0 0 0 0 0
## Singlet 0 0 2935 2997 3221 3135 3393 3162
## hash.ID
## HTO_classification.global CTL.3 CTL.4
## Doublet 0 0
## Negative 0 0
## Singlet 3159 3084
## custom cutoff run this
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
## orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGAACAGGA-1 ILC3_FB 1828 8 1828 8
## AAACCCAAGAATTGCA-1 ILC3_FB 807 8 807 8
## AAACCCAAGCAGCCCT-1 ILC3_FB 609 8 609 8
## AAACCCAAGCCTGAAG-1 ILC3_FB 646 8 646 8
## HTO_maxID HTO_secondID HTO_margin HTO_classification
## AAACCCAAGAACAGGA-1 CKO.2 CTL.4 3.211363 CKO.2
## AAACCCAAGAATTGCA-1 CTL.3 CTL.2 2.459500 CTL.3
## AAACCCAAGCAGCCCT-1 CKO.4 CTL.2 2.304885 CKO.4
## AAACCCAAGCCTGAAG-1 CKO.1 CKO.4 3.040913 CKO.1
## HTO_classification.global hash.ID
## AAACCCAAGAACAGGA-1 Singlet CKO.2
## AAACCCAAGAATTGCA-1 Singlet CTL.3
## AAACCCAAGCAGCCCT-1 Singlet CKO.4
## AAACCCAAGCCTGAAG-1 Singlet CKO.1
## default q0.99 run this
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
# levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
# levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,28500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1980,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=4,
cols = color.FB)
FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=4,same.y.lims= TRUE,
cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID.sort")
# first, remove negative cells from th object
#FB.seur.subset <- FB.seur
# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"
#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:8, perplexity = 500, check_duplicates = FALSE)
#saveRDS(FB.seur.subset, "FB0809.seur.subset.rds")
FB.seur.subset <- readRDS("FB0809.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(levels(FB.seur@active.ident)),
cols = color.FB) + labs(title = "FB Max_ID"),
DimPlot(FB.seur.subset, order = c("Doublet",rev(levels(FB.seur@active.ident)),"Negative"), group.by = 'hash.ID.sort', label = F,
cols = c("lightgrey",color.FB,"#FF6C91")) +
labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)
##
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
##
## pattern
this is not bad ! with comfortable colors/shapes.
(while for sn10x data, might should be careful to open your eyes~)
FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38"))
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.015)
table(FB.seur@meta.data$hash.ID.sort)
##
## Doublet Negative CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2
## 9197 337 2935 2997 3221 3135 3393 3162
## CTL.3 CTL.4
## 3159 3084
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,12800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+875,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html
GEX.seur <- CreateSeuratObject(counts = GEX,
min.cells = 3,
min.features = 200,
project = "ILC3_GEX")
GEX.seur
## An object of class Seurat
## 16621 features across 34620 samples within 1 assay
## Active assay: RNA (16621 features, 0 variable features)
grep("^mt-",rownames(GEX),value = T)
## [1] "mt-Nd1" "mt-Nd2" "mt-Co1" "mt-Co2" "mt-Atp8" "mt-Atp6" "mt-Co3"
## [8] "mt-Nd3" "mt-Nd4l" "mt-Nd4" "mt-Nd5" "mt-Nd6" "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)
RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)
# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
##
## Doublet Negative CKO.1 CKO.2 CKO.3 CKO.4 CTL.1 CTL.2
## 9197 337 2935 2997 3221 3135 3393 3162
## CTL.3 CTL.4
## 3159 3084
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, split.by = "FB.info")
GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative")
GEX.seur
## An object of class Seurat
## 16621 features across 25086 samples within 1 assay
## Active assay: RNA (16621 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, split.by = "FB.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, split.by = "FB.info")
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
GEX.seur <- subset(GEX.seur, subset = percent.mt < 7.5 & nFeature_RNA > 500 & nFeature_RNA < 3000 & nCount_RNA < 10000)
GEX.seur
## An object of class Seurat
## 16621 features across 24449 samples within 1 assay
## Active assay: RNA (16621 features, 0 variable features)
filtered obj
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = color.FBraw,
pt.size = 0.1, split.by = "FB.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = color.FBraw,
pt.size = 0, split.by = "FB.info")
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,12800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+875,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,4500),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+325,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))
GEX.seur <- CellCycleScoring(GEX.seur,
s.features = s.genes,
g2m.features = g2m.genes)
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info", cols = color.FB,
ncol = 2, pt.size = 0.0) & geom_jitter(alpha=0.012)
GEX.seur.cc <- GEX.seur
GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)
GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')
some few cycling should exist.
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)
# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2
head(VariableFeatures(GEX.seur), 200)
## [1] "Hist1h1b" "Il17a" "Gzma" "Ccl4"
## [5] "Il22" "Ccl5" "Ccl1" "Cxcl2"
## [9] "Cxcl10" "Areg" "Xcl1" "Cd74"
## [13] "Hist1h2ae" "Penk" "Calca" "Cxcl1"
## [17] "Stmn1" "Ccl20" "Hspa1a" "S100g"
## [21] "Il13" "Il4" "Ube2c" "Il5"
## [25] "Gzmc" "H2-Aa" "Pclaf" "Cxcl3"
## [29] "H2-Eb1" "Tnfrsf4" "Il17f" "S100a6"
## [33] "Gzmb" "Ifi27l2a" "Lgals1" "Egr1"
## [37] "Cxcl9" "Ifng" "Hs3st1" "Il10"
## [41] "Ccl3" "Trdc" "Stmn2" "Sostdc1"
## [45] "Il2" "Top2a" "Mki67" "Serpine1"
## [49] "Pcp4" "Csf2" "Tph1" "H2-Ab1"
## [53] "Defa24" "Hist1h2ap" "Birc5" "Plac8"
## [57] "Hist1h3c" "Lgals3" "Cenpf" "Try5"
## [61] "Hmgb2" "Hspb1" "Jun" "Ctla2a"
## [65] "Hist1h2ab" "Hspa1b" "Ifitm2" "S100a4"
## [69] "Egr2" "Serpinb1a" "Rrm2" "Ccr7"
## [73] "Ly6a" "Ifit1" "Cd36" "Ly6d"
## [77] "Ifitm1" "Ctla4" "Akap12" "Actb"
## [81] "Crip1" "Bambi" "Rrad" "Tmsb4x"
## [85] "Defa21" "Rsad2" "Il1b" "Bcl2a1b"
## [89] "Tnfsf8" "Tubb5" "Defa22" "Gzmf"
## [93] "Mt1" "Hist1h1e" "Gzme" "Cdca8"
## [97] "Nusap1" "Apoe" "Tubb2a" "Tyrobp"
## [101] "Egr3" "Igkc" "Spp1" "AA467197"
## [105] "Dusp14" "Fabp5" "Vim" "Gadd45b"
## [109] "C2cd4b" "Nkg7" "Dppa2" "Klf2"
## [113] "Ifitm3" "Hes1" "Igha" "Lyz2"
## [117] "Cd7" "G0s2" "Fos" "Defa30"
## [121] "Fabp4" "Rgcc" "Lyz1" "Ptgs2"
## [125] "Cd83" "Ccnb2" "Hbegf" "Hopx"
## [129] "Il1r2" "Ms4a4b" "Isg15" "Actg2"
## [133] "Gzmd" "Jchain" "Rasl11a" "Gadd45g"
## [137] "Iigp1" "Hba-a1" "Gata3" "Cldn5"
## [141] "Zfp36" "Ptpn13" "Gm10827" "Cks1b"
## [145] "Dnaja1" "Klrd1" "Trbc1" "Trac"
## [149] "Ermn" "Arg1" "Klra5" "Atf3"
## [153] "Hmmr" "Ccna2" "Clspn" "AY761184"
## [157] "Lgals7" "Tyms" "Slpi" "Hilpda"
## [161] "Vcam1" "Il21" "Il1rl1" "Muc3"
## [165] "Spc24" "Gfod2" "Tuba1b" "Izumo1r"
## [169] "Dnajb1" "Plek" "Cd5" "Odc1"
## [173] "Bcl2a1a" "1700061F12Rik" "Cd3g" "Fbxo5"
## [177] "Klra1" "H2afz" "Cdca3" "Tpx2"
## [181] "Dennd5b" "Acod1" "Klra7" "Fst"
## [185] "Cd163l1" "Trdv4" "Kif11" "Igfbp4"
## [189] "Il6" "Cd24a" "Hist1h1d" "Serpinb9"
## [193] "Socs2" "Gm14851" "Esco2" "Ier3"
## [197] "Cenpe" "Serpinb6b" "Chad" "Cd52"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix
# exclude MT genes and more
#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)
DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))
VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,
DIG,
CC_gene) )
GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1
## Positive: Fcer1g, Tmem176a, Tmem176b, Gem, Itm2b, Pxdc1, Sdc4, Il22, Car2, Nrgn
## Cdkn1a, Zfp36l1, Eprs, Ckb, Prr29, Ccdc184, Klrb1b, Hk2, Klf4, Cwc25
## Odc1, Tnfsf11, Rgs1, Atf3, Arrdc4, Fam110a, Gpx1, Ccrl2, Dad1, Rgcc
## Negative: Cd3d, Cd3g, Cd2, Gramd3, Ms4a4b, Fyb, Gimap6, Cd28, Cd3e, Ms4a6b
## Ifi27l2a, Il21r, Dusp10, Lef1, Klf2, Lat, Pmepa1, Slfn2, Ccr7, Satb1
## Cd5, S1pr1, Stk4, Gimap4, Gimap1, Cd27, Coro1a, Cd247, Cd6, Sh2d1a
## PC_ 2
## Positive: Ccr7, Vegfa, Cdc14a, Hmgn3, Cd81, Bst2, S1pr1, Klf2, Igfbp4, Sdc4
## Lef1, Cx3cl1, Batf3, Ramp1, Rflnb, Cryaa, Cd74, Bmp2, Irs2, Myof
## Rgcc, Nrp1, Ttn, Cited2, Ccr6, Tgm2, Lmna, Bcl2, Ctsl, Sell
## Negative: Tmsb4x, Bcl2a1d, Cxcr6, Pfn1, Serpinb9, Lgals1, Capg, Ucp2, Icos, Serpinb6b
## Pik3r1, Cd52, Ctsw, Bcl2a1b, Serpinb1a, Actn2, Srgn, Ptprcap, Ltb4r1, S100a6
## Maf, Clic1, Slc16a6, S100a11, Ikzf2, Rgs1, Slc7a6os, Arl5c, Gzmb, Vim
## PC_ 3
## Positive: Hs3st1, Calca, Ptpn13, Il5, Gata3, Ccl1, Klrg1, Il17rb, Il13, Il1rl1
## Ipmk, Areg, Il4, Tspan13, Slc7a8, Csf2, Deptor, Otulinl, Ly6a, Hes1
## Klf5, Ctla2a, Ifitm2, Tnfrsf18, Mras, Ccr4, Spcs3, Gpx4, Rab27b, Dgat2
## Negative: Serpinb9, Lef1, Serpinb6b, Fcer1g, Car2, Klrk1, Gimap4, Serpinb1a, Pik3r1, Ms4a4b
## Upp1, Ccr7, Tubb2a, Klf2, Rflnb, S1pr1, Rbpms2, Ctsw, Slc7a6os, Gramd3
## Ckb, Klrb1b, Ms4a6b, Gatad1, Gzmc, Calm1, Slc16a6, Pcp4, Irf7, Sell
## PC_ 4
## Positive: Hs3st1, Serpinb9, Lmo4, Calca, Serpinb6b, Csf2, Ctsw, Pik3r1, Il5, Il17rb
## Ptpn13, Ccl1, Sub1, Gata3, Il13, Socs2, Ikzf2, Basp1, Nkg7, Igsf5
## Satb1, Rab4a, Upp1, Klrg1, Klrk1, Il4, Dusp10, Ncr1, Ppfia1, Epas1
## Negative: Actb, Pclaf, Tnfrsf4, S100a4, Spc24, Ctla4, Actg1, Ccr6, S100a6, Stmn1
## Vim, Lgals1, Bst2, Gpx1, Hmgn3, Ostf1, Odc1, Asf1b, Ramp1, Cd5
## Il17a, Ccna2, Cd4, Cdc14a, Racgap1, S100a10, Vegfa, Nmrk1, Tk1, Nebl
## PC_ 5
## Positive: Bcl2a1b, Ccl5, Cd3g, Ccr2, Cd3e, Hopx, Ccr5, Ctla4, Nkg7, Got1
## Plac8, Fasl, Fth1, S100a10, Sh2d2a, Cd6, Cd163l1, Actn2, Il10, Cd52
## S100a6, Odc1, Tnfrsf4, Cd3d, Cd40lg, S100a11, Il12rb2, Ltb, Gimap7, Ubald2
## Negative: Pclaf, Stmn1, Spc24, Ccna2, Asf1b, Kif4, Knl1, Tk1, Cenpm, Smc2
## Shcbp1, Racgap1, Tubb5, Spc25, Nrm, Kif15, Mcm3, Cdkn3, Cenph, Tuba1b
## Pbk, Aspm, Hmgn2, Esco2, Ska1, Ncapg2, Diaph3, Bub1b, Ptma, Dut
length(setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,DIG,CC_gene)))
## [1] 1224
setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,DIG,CC_gene))[1:300]
## [1] "Il17a" "Gzma" "Ccl4" "Il22" "Ccl5" "Ccl1"
## [7] "Cxcl2" "Cxcl10" "Areg" "Xcl1" "Cd74" "Penk"
## [13] "Calca" "Cxcl1" "Stmn1" "Ccl20" "S100g" "Il13"
## [19] "Il4" "Il5" "Gzmc" "H2-Aa" "Pclaf" "Cxcl3"
## [25] "H2-Eb1" "Tnfrsf4" "Il17f" "S100a6" "Gzmb" "Ifi27l2a"
## [31] "Lgals1" "Egr1" "Cxcl9" "Ifng" "Hs3st1" "Il10"
## [37] "Ccl3" "Stmn2" "Sostdc1" "Il2" "Serpine1" "Pcp4"
## [43] "Csf2" "Tph1" "H2-Ab1" "Defa24" "Plac8" "Lgals3"
## [49] "Try5" "Ctla2a" "Ifitm2" "S100a4" "Egr2" "Serpinb1a"
## [55] "Ccr7" "Ly6a" "Ifit1" "Cd36" "Ly6d" "Ifitm1"
## [61] "Ctla4" "Akap12" "Actb" "Crip1" "Bambi" "Rrad"
## [67] "Tmsb4x" "Defa21" "Rsad2" "Il1b" "Bcl2a1b" "Tnfsf8"
## [73] "Tubb5" "Defa22" "Gzmf" "Mt1" "Gzme" "Apoe"
## [79] "Tubb2a" "Tyrobp" "Egr3" "Spp1" "Dusp14" "Fabp5"
## [85] "Vim" "Gadd45b" "C2cd4b" "Nkg7" "Dppa2" "Klf2"
## [91] "Ifitm3" "Hes1" "Lyz2" "Cd7" "G0s2" "Defa30"
## [97] "Fabp4" "Rgcc" "Lyz1" "Ptgs2" "Cd83" "Hbegf"
## [103] "Hopx" "Il1r2" "Ms4a4b" "Isg15" "Actg2" "Gzmd"
## [109] "Rasl11a" "Gadd45g" "Iigp1" "Gata3" "Cldn5" "Zfp36"
## [115] "Ptpn13" "Klrd1" "Ermn" "Arg1" "Klra5" "Atf3"
## [121] "Ccna2" "Lgals7" "Slpi" "Hilpda" "Vcam1" "Il21"
## [127] "Il1rl1" "Muc3" "Spc24" "Gfod2" "Tuba1b" "Izumo1r"
## [133] "Plek" "Cd5" "Odc1" "Bcl2a1a" "Cd3g" "Fbxo5"
## [139] "Klra1" "H2afz" "Dennd5b" "Acod1" "Klra7" "Fst"
## [145] "Cd163l1" "Igfbp4" "Il6" "Cd24a" "Serpinb9" "Socs2"
## [151] "Esco2" "Ier3" "Serpinb6b" "Chad" "Cd52" "Eprs"
## [157] "Dut" "Adra2a" "Nrgn" "Olfm4" "Ccr2" "Akr1c18"
## [163] "Tent5a" "Cd3d" "Arhgef40" "Abcd2" "Aldh1a3" "Gramd3"
## [169] "Ccnd1" "Gem" "Cd2" "Id2" "Tnfsf11" "Nlrp3"
## [175] "Klra6" "Lef1" "Nkx6-2" "Prss2" "Smc2" "Tpm2"
## [181] "Bcl2a1d" "Egr4" "Arl5c" "Vegfa" "Cd28" "Lmo4"
## [187] "Upp1" "Gbp2" "Adm" "Tnfrsf9" "Dnase1l3" "Tnf"
## [193] "Sptssb" "Gcg" "Actn2" "Ccr3" "Cd70" "Pld4"
## [199] "Basp1" "Rflnb" "Ptcra" "Ptprz1" "Cebpd" "Sox4"
## [205] "Evx1os" "Palld" "Cdkn1a" "Cst8" "Coro1a" "Serpine2"
## [211] "Krt8" "Irf7" "Nmrk1" "Rnd3" "Rgs1" "Klre1"
## [217] "Mcm3" "Lmna" "S1pr1" "Gpr83" "Bst2" "Dusp2"
## [223] "Kcnq1ot1" "Aspm" "Lgals2" "Foxf1" "Hmgn2" "Pcdh10"
## [229] "Zg16" "Atp1b2" "Ccr1" "Anxa2" "Clu" "H1f0"
## [235] "Rapsn" "Emp3" "Kif4" "Timp3" "Pfn1" "Sh2d1a"
## [241] "Mmp10" "Foxp3" "Ccr5" "Klrg1" "Gla" "Msx1"
## [247] "Cd40lg" "Rgs16" "S100a10" "Serpinb2" "Krt18" "Plk2"
## [253] "Ms4a6b" "Marcksl1" "Rgs10" "Npy" "Hoxa7" "Csn3"
## [259] "Ccl7" "Foxd1" "H1fx" "Mdk" "Lgmn" "Tnfaip2"
## [265] "Runx1t1" "Tk1" "Dapl1" "Irf4" "Tulp3" "Zfp503"
## [271] "Asb4" "Zbtb32" "Asf1b" "Lig1" "Nr4a1" "Gimap7"
## [277] "Bmp2" "Cwc25" "H2afx" "Fzd8" "Pcdh18" "Peg3"
## [283] "Hebp1" "Ccrl2" "Irf8" "Ttn" "Ifit3" "Pmepa1"
## [289] "Ly6c2" "Edn1" "Il22ra2" "Klra9" "Apold1" "Serpina3f"
## [295] "Tshz2" "Elavl2" "Vgf" "Ccn2" "Ccl12" "Cth"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)
DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)
ElbowPlot(GEX.seur,ndims = 50)
PCs <- 1:27
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 10)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 2.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 24449
## Number of edges: 333437
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7863
## Number of communities: 39
## Elapsed time: 2 seconds
## 1 singletons identified. 38 final clusters.
GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 153)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 10:18:02 UMAP embedding parameters a = 0.9922 b = 1.112
## 10:18:02 Read 24449 rows and found 27 numeric columns
## 10:18:02 Using Annoy for neighbor search, n_neighbors = 20
## 10:18:02 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 10:18:05 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpIPh7o5\file55d47427bdd
## 10:18:05 Searching Annoy index using 1 thread, search_k = 2000
## 10:18:11 Annoy recall = 100%
## 10:18:11 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 10:18:13 Initializing from normalized Laplacian + noise (using irlba)
## 10:18:14 Commencing optimization for 200 epochs, with 726526 positive edges
## 10:18:38 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)
FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))
DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'tsne', group.by = "FB.info", split.by = "FB.info",
ncol = 4, cols = color.FB)
DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info",
ncol = 4, cols = color.FB)
GEX.seur$cnt <- factor(gsub(".1$|.2$|.3$|.4$","",as.character(GEX.seur$FB.info)),
levels = c("CTL",
"CKO"))
color.cnt <- color.FB[c(5,1)]
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
markers.mig <- c("Ptprc","Cd3d","Cd3e","Trdc",
"Tbx21","Ifng","Gata3","Il5",
"Il10","Il4","Calca","Hilpda",
"Rorc","Fcer1g","Il22","Il17a",
"Cd4","Cd8a")
FeaturePlot(GEX.seur,
features = markers.mig,
ncol = 4)
FeaturePlot(GEX.seur, features = "Rorc")
FeaturePlot(GEX.seur, features = "Il22")
DotPlot(GEX.seur, features = markers.mig, group.by = "seurat_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
DotPlot(GEX.seur, features = markers.mig, group.by = "FB.info",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
# well, this list of markers is from result of previous test analysis
markers.sort <- c("Cd3d","Cd3e","Ccr7","Lef1",
"S1pr1","Sell","Sox4","Tcrg-C2",
"Izumo1r","Cd4","Il10","Hopx",
"Foxp3","Maf","Nebl","Il17a",
"Top2a","Mki67","Pcna","Mcm6",
"Ly6a","Gzma","Cd40lg","Ifng",
"Ccl5","Klrd1","Cd7","Itgae",
"Ahr",
"Igkc","Igha","Iglc1","Jchain",
"Trdc","Trdv4","S100a6","Cd163l1",
"Pdcd1",
"Jun","Fos","Egr1","Hmgn3",
"Rorc","Ccr6","Cd74","Cd83",
"Cd81","Il17f","Prr29","Car2",
"Tmem176a","Tmem176b","Gzmc","Irf8",
"Cxcl2","Cxcl3","Gzmb","Ncr1",
"Cxcr6","Upp1","Klrb1c","Cd69",
"Apoe","Lyz2","H2-Aa","C1qb",
"Fcer1g",
"Ifit1","Isg20","Stat1","Cxcl10",
"Calca","Gata3","Il17rb","Il13",
"Il4","Hilpda","Ar","Il1rl1",
"Il5","Cd24a","Ccl1","Areg"
)
#DotPlot(GEX.seur, features = markers.sort, group.by = "sort_clusters",
DotPlot(GEX.seur, features = markers.sort, group.by = "seurat_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
library(DoubletFinder)
for(i in 1:length(sweep.res.list)){
if(length(sweep.res.list[[i]]$pANN[is.nan(sweep.res.list[[i]]$pANN)]) !=0){
if(i!=1){
sweep.res.list[[i]] <- sweep.res.list[[i-1]]
}else(
sweep.res.list[[i]] <- sweep.res.list[[i+1]]
)
}
}
sweep.stats <- summarizeSweep(sweep.res.list, GT=FALSE)
bcmvn <- find.pK(sweep.stats)
## NULL
pk_v <- as.numeric(as.character(bcmvn$pK))
pk_good <- pk_v[bcmvn$BCmetric==max(bcmvn$BCmetric)]
# specify expected doublet number
nExp_poi <- round(0.05*length(colnames(GEX.seur)))
GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good,
nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 8150 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.05"
# specify expected doublet number
nExp_poi <- round(0.1*length(colnames(GEX.seur)))
GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good,
nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 8150 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.1"
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.05") +
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")
GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
levels = c(2,11, # naive
32, 29, #
24,
18, 13, 27,
16, 31, 33, 9, 20,
23,
12, 22,5,10,6, 35, 0,
17,4,1,3,15, 19, 14, 25, 36, 26,
7, 21,
8,28, 30,37,34))
# previous ref markers
markers.sort <- c("Cd3d","Cd3e","Ccr7","Lef1",
"S1pr1","Sell","Sox4","Tcrg-C2",
"Izumo1r","Cd4","Il10","Hopx", "Cd8a","Cd8b1",
"Foxp3","Maf","Nebl","Il17a",
"Top2a","Mki67","Pcna","Mcm6",
"Ly6a","Gzma","Cd40lg","Ifng",
"Ccl5","Klrd1","Cd7","Itgae",
"Ahr",
"Igkc","Igha","Iglc1","Jchain",
"Trdc","Trdv4","S100a6","Cd163l1",
"Pdcd1",
"Jun","Fos","Egr1","Hmgn3",
"Rorc","Ccr6","Cd74","Cd83",
"Cd81","Il17f","Prr29","Car2",
"Tmem176a","Tmem176b","Gzmc","Irf8",
"Cxcl2","Cxcl3","Gzmb","Ncr1",
"Cxcr6","Upp1","Klrb1c","Cd69",
"Apoe","Lyz2","H2-Aa","C1qb",
"Fcer1g",
"Ifit1","Isg20","Stat1","Cxcl10",
"Calca","Gata3","Il17rb","Il13",
"Il4","Hilpda","Ar","Il1rl1",
"Il5","Cd24a","Ccl1","Areg"
)
DotPlot(GEX.seur, features = markers.sort, group.by = "sort_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
#cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, group.by = "sort_clusters")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
#cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, group.by = "sort_clusters")
VlnPlot(GEX.seur, features = c("percent.rb"),
#ncol = 3,
ncol = 1,
cols = color.cnt,
pt.size = 0.1, group.by = "sort_clusters", split.by = "cnt")
VlnPlot(GEX.seur, features = c("percent.rb"),
#ncol = 3,
ncol = 1,
cols = color.cnt,
pt.size = 0, group.by = "sort_clusters", split.by = "cnt")
table(GEX.seur@meta.data[,c("sort_clusters","DoubletFinder0.05")])
## DoubletFinder0.05
## sort_clusters Doublet Singlet
## 2 1 1535
## 11 1 886
## 32 8 75
## 29 130 3
## 24 20 255
## 18 31 599
## 13 20 790
## 27 64 162
## 16 19 646
## 31 5 93
## 33 44 23
## 9 13 934
## 20 10 497
## 23 99 196
## 12 29 840
## 22 27 416
## 5 38 1221
## 10 38 863
## 6 12 1151
## 35 50 4
## 0 22 1980
## 17 9 649
## 4 71 1239
## 1 51 1525
## 3 19 1448
## 15 12 695
## 19 23 543
## 14 68 654
## 25 18 243
## 36 1 39
## 26 120 113
## 7 56 1087
## 21 13 434
## 8 66 995
## 28 8 197
## 30 2 125
## 37 1 16
## 34 3 56
table(GEX.seur@meta.data[,c("sort_clusters","DoubletFinder0.1")])
## DoubletFinder0.1
## sort_clusters Doublet Singlet
## 2 3 1533
## 11 13 874
## 32 20 63
## 29 131 2
## 24 66 209
## 18 77 553
## 13 112 698
## 27 203 23
## 16 52 613
## 31 13 85
## 33 44 23
## 9 32 915
## 20 31 476
## 23 131 164
## 12 83 786
## 22 42 401
## 5 84 1175
## 10 70 831
## 6 22 1141
## 35 51 3
## 0 48 1954
## 17 32 626
## 4 164 1146
## 1 116 1460
## 3 39 1428
## 15 38 669
## 19 55 511
## 14 186 536
## 25 36 225
## 36 3 37
## 26 173 60
## 7 113 1030
## 21 34 413
## 8 91 970
## 28 19 186
## 30 4 123
## 37 5 12
## 34 9 50
# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "sort_clusters"
#GEX.markers.pre <- FindAllMarkers(GEX.seur, only.pos = TRUE, min.pct = 0.05,
# test.use = "MAST",
# #test.use = "wilcox",
# logfc.threshold = 0.25)
GEX.markers.pre <- read.table("20230927_10x_SZJ.new20240809.sort_markers.csv", header = TRUE, sep = ",")
#GEX.markers.pre %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
GEX.markers.pre$cluster <- factor(as.character(GEX.markers.pre$cluster),
levels = levels(GEX.seur$sort_clusters))
markers.pre_t32 <- (GEX.markers.pre %>% group_by(cluster) %>%
filter(pct.1>0.1) %>%
top_n(n = 32, wt = avg_log2FC) %>%
ungroup() %>%
arrange(desc(avg_log2FC*pct.1),gene) %>%
distinct(gene, .keep_all = TRUE) %>%
arrange(cluster,p_val_adj))$gene
markers.pre_t48 <- (GEX.markers.pre %>% group_by(cluster) %>%
filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
top_n(n = 48, wt = avg_log2FC) %>%
ungroup() %>%
arrange(desc(avg_log2FC*pct.1),gene) %>%
distinct(gene, .keep_all = TRUE) %>%
arrange(cluster,p_val_adj))$gene
markers.pre_t60 <- (GEX.markers.pre %>% group_by(cluster) %>%
filter(pct.1>0.01 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
top_n(n = 60, wt = avg_log2FC) %>%
filter(p_val_adj < 0.01) %>%
ungroup() %>%
arrange(desc(avg_log2FC*pct.1),gene) %>%
distinct(gene, .keep_all = TRUE) %>%
arrange(cluster,p_val_adj))$gene
markers.pre_t120 <- (GEX.markers.pre %>% group_by(cluster) %>%
filter(pct.1>0.01 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
top_n(n = 120, wt = avg_log2FC) %>%
filter(p_val_adj < 0.01) %>%
ungroup() %>%
arrange(desc(avg_log2FC*pct.1),gene) %>%
distinct(gene, .keep_all = TRUE) %>%
arrange(cluster,p_val_adj))$gene
861/60
## [1] 14.35
861/64
## [1] 13.45312
861/65
## [1] 13.24615
pp.t60 <- list()
for(i in 1:14){
pp.t60[[i]] <- DotPlot(GEX.seur, features = rev(markers.pre_t60[(64*i-63):(64*i)])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: NA
pp.t60
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
nnn = 1447
nnn/60
## [1] 24.11667
nnn/64
## [1] 22.60938
nnn/65
## [1] 22.26154
pp.t120 <- list()
for(i in 1:23){
pp.t120[[i]] <- DotPlot(GEX.seur, features = rev(markers.pre_t120[(64*i-63):(64*i)])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: NA
#pp.t120
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
clusters=GEX.seur$sort_clusters)[c(3:10),],
main = "Cell Count",
gaps_row = c(4),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,
pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
clusters=GEX.seur$sort_clusters)[c(3:10),]),
main = "Cell Ratio",
gaps_row = c(4),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)
VlnPlot(GEX.seur, features = c("Cd3d","Cd4","Rorc","Il17a",
"Il22","Il17f","H2-K1","H2-Q7",
"Rps19","Tnfsf11","Ahr","Gzmc"),
#ncol = 3,
ncol = 2,
#cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, group.by = "sort_clusters") + NoLegend()
VlnPlot(GEX.seur, features = c("Gzmk","Gzma",
"Tbx21","Nkg7",
"Il4","Xcl1",
"Klrb1c","Cd160"),
#ncol = 3,
ncol = 2,
#cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, group.by = "sort_clusters") + NoLegend()
####
##
#Cycling: C26 - ILC3.Ncr1
# C27 - Th17 (most)
#
#Mix: C29/33/35
# C23 - Bcell
# C36 - Myeloid
#
#Tcell
# Tn(Naive) C2/11 Sell+Ccr7+
# C32 Cd4-Trac-,very few Cd8a+,Pdcd1+ next to remove it
# Izumo1r+Lrig1+,very few Foxp3+ C24 (like a transit to Treg)
# let it be Treg.0
# Treg C18 Foxp3+ (Lzumo1r low) Il10+
# Th17 C13 Il17a+
# Cd40lg (hi) Ifng (hi) Fos/Egr1 (hi) C31
# Th1 C16 Gzmk+ Gzma+
#
# innate like (previous Tin )
# iNKT C9 Cd4-Tbx21+Il4+
# MAIT C20 Cd4-Cd160+Xcl1+
#
# Th2-like C34 Cd3d+Cd4+Foxp3-Gata3+
# gdT C7/21 Trdc+Trdv4+Tcrg-C1+
# Cd163l1+Blk+Mmp25+
#
#ILC2 C8/28/30
# C37 Ifit1+Isg20+Stat1+
#
#ILC3
# Ccr6+ C12/22/5/10/6
# C22 Cd74+MHCII+(H2-Aa/H2-Eb1-H2-Ab1)
# C12 Jun (hi) Fos (hi) Rorc (hi)
#
# Ccr6-Ncr1- C0
#
# Ncr1+ C17/4/1/3/15/19/14/25
# C14 Jun (hi) Fos (hi) Rorc (hi)
# C19 Cxcl9+Serpina3f+Irgm1+ Tgtp1/2+
#
##
####
GEX.seur$preAnno <- as.character(GEX.seur$seurat_clusters)
GEX.seur$preAnno[GEX.seur$preAnno %in% c(2,11)] <- "Tn"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(24,18)] <- "Treg"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(13)] <- "Th17"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(27)] <- "Th.CC"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(16)] <- "Th1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(31)] <- "Th.Egr1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(9)] <- "iNKT"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(20)] <- "MAIT"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(34)] <- "Th2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(7,21)] <- "gdT"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(8,28,30)] <- "ILC2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(37)] <- "ILC2.Ifit1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(26)] <- "ILC3.CC"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(17,4,1,3,15,19,14,25)] <- "ILC3.Ncr1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(0)] <- "ILC3.nn"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(12,22,5,10,6)] <- "ILC3.Ccr6"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(29,33,35,23,36,32)] <- "Mix"
GEX.seur$preAnno <- factor(GEX.seur$preAnno,
levels = c("Tn","Treg",
"Th17","Th.CC","Th1","Th.Egr1","Th2",
"iNKT","MAIT","gdT",
"ILC2","ILC2.Ifit1",
"ILC3.CC","ILC3.Ncr1","ILC3.nn","ILC3.Ccr6",
"Mix"))
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
DimPlot(GEX.seur, reduction = "umap", label = T, label.size = 3.2, group.by = "preAnno")
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
clusters=GEX.seur$preAnno)[c(3:10),],
main = "Cell Count",
gaps_row = c(4),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,
pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
clusters=GEX.seur$preAnno)[c(3:10),]),
main = "Cell Ratio",
gaps_row = c(4),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
#cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, group.by = "preAnno")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
#cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, group.by = "preAnno")
GEX.pure <- subset(GEX.seur, subset = preAnno != "Mix")
GEX.pure <- subset(GEX.pure, subset = DoubletFinder0.1=="Singlet" | preAnno %in% c("Th.CC","ILC3.CC"))
#GEX.pure <- subset(GEX.pure, subset = percent.mt < 7.5 & nFeature_RNA > 500 & nFeature_RNA < 3000 & nCount_RNA < 10000)
GEX.pure
## An object of class Seurat
## 16621 features across 22088 samples within 1 assay
## Active assay: RNA (16621 features, 1224 variable features)
## 3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.pure, reduction = "umap", label = T, group.by = "seurat_clusters") +
DimPlot(GEX.pure, reduction = "umap", label = T, label.size = 3.2, group.by = "preAnno")
VlnPlot(GEX.pure, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
#cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, group.by = "preAnno")
VlnPlot(GEX.pure, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
#cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, group.by = "preAnno")
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.pure$FB.info,
clusters=GEX.pure$preAnno)[c(3:10),],
main = "Cell Count",
gaps_row = c(4),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,
pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.pure$FB.info,
clusters=GEX.pure$preAnno)[c(3:10),]),
main = "Cell Ratio",
gaps_row = c(4),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)
# previous ref markers
markers.sort <- c("Cd3d","Cd3e","Ccr7","Lef1",
"S1pr1","Sell","Sox4","Tcrg-C2",
"Izumo1r","Cd4","Il10","Hopx", "Cd8a","Cd8b1",
"Foxp3","Maf","Nebl","Il17a",
"Top2a","Mki67","Pcna","Mcm6",
"Ly6a","Gzma","Cd40lg","Ifng",
"Ccl5","Klrd1","Cd7","Itgae",
"Ahr",
"Igkc","Igha","Iglc1","Jchain",
"Trdc","Trdv4","S100a6","Cd163l1",
"Pdcd1",
"Jun","Fos","Egr1","Hmgn3",
"Rorc","Ccr6","Cd74","Cd83",
"Cd81","Il17f","Prr29","Car2",
"Tmem176a","Tmem176b","Gzmc","Irf8",
"Cxcl2","Cxcl3","Gzmb","Ncr1",
"Cxcr6","Upp1","Klrb1c","Cd69",
"Apoe","Lyz2","H2-Aa","C1qb",
"Fcer1g",
"Ifit1","Isg20","Stat1","Cxcl10",
"Calca","Gata3","Il17rb","Il13",
"Il4","Hilpda","Ar","Il1rl1",
"Il5","Cd24a","Ccl1","Areg"
)
markers.new <- c("Cd3d","Cd3e","Cd4","Cd8a",
"Klf2","Ccr7","Lef1","S1pr1",
"Tubb2a","Rflnb","Igfbp4","Sell",
"Itm2a","Itga6","Smc4",#"Sox4","Pdcd1",
"Ptprc",#"Fam241a","Slamf6",
"Izumo1r","Tbc1d4",
"Ctla4","Il10",
"Hopx","Foxp3","Ccr2","Maf",
"Capg","Slamf1","Icos",
"Il17a","Nebl","S100a11","S100a10",
"Top2a","Mki67","Pcna","Mcm6",
"Fasl","Gzma","Gzmk","Ccr5",
"Il12rb2",
"Jun","Fos","Ier2","Egr1",
"Ifng", "Tnf",
"Tbx21",
"Klrb1c","Xcl1","Nkg7","Plac8","Cd160",
"Ccl5",
"Klrd1","Cd7","Itgae",
"Tcrg-C1","Trdv4","Cd163l1","Blk","Mmp25",
"Calca","Gata3","Il17rb","Il13",
"Csf2","Dgat2",
"Il4","Hilpda","Ar","Il1rl1",
"Il5",#"Cd24a",
"Ccl1","Areg","Ahr",
"Cxcl10",
"Gbp6","Stat1","Isg15","Ifit1",
"Ncr1","Gzmc","Gzmb","Irf8",
"Cxcr6","Irf7","Slc16a6","Klrk1",
"Tmem176b","Fcer1g","Car2","Ckb",
"Cd69","Jaml","Cxcl9",
"Il22","Gem","Pxdc1","Cdc14a",
"Sdc4","Vegfa","Bst2","Nmrk1",
"Hmgn3","Cd81","Cx3cl1",
"Cd74","H2-Aa","H2-Eb1","H2-Ab1",
"Ccr6","Batf3","Rorc","Maff",
"Icam1","Il18r1",
"Il17f"
)
DotPlot(GEX.pure, features = markers.new, group.by = "preAnno",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1, size = 9)) +
scale_y_discrete(limits=rev)
DotPlot(GEX.seur, features = markers.new, group.by = "preAnno",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1, size = 9)) +
scale_y_discrete(limits=rev)
#saveRDS(GEX.seur, "./20230927_10x_SZJ.new20270809.preAnno.rds")
VlnPlot(GEX.pure, features = c("Cd3d","Cd4","Rorc","Il17a",
"Tbx21","Xcl1","Cd160","Cd7",
"Klrb1c","Ifng","Mr1","Cd1d1"),
#ncol = 3,
ncol = 2,
#cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, group.by = "preAnno") + NoLegend()
VlnPlot(GEX.pure, features = c("Cd160","Plac8","Klrd1","Itgae",
"Tcrg-C1","Trdv4","Cd163l1","Blk",
"Gzma","Gzmk","Ccr5","Ccl5"),
#ncol = 3,
ncol = 2,
#cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, group.by = "preAnno") + NoLegend()
GEX.pure$rep <- gsub("CKO.|CTL.","rep",as.character(GEX.pure$FB.info))
GEX.pure$rep[1:5]
## AAACCCAAGAACAGGA-1 AAACCCAAGAATTGCA-1 AAACCCAAGCAGCCCT-1 AAACCCAAGCCTGAAG-1
## "rep2" "rep3" "rep4" "rep1"
## AAACCCAAGCGCATCC-1
## "rep2"
GEX.pure$preAnno <- factor(as.character(GEX.pure$preAnno),
levels = setdiff(levels(GEX.seur$preAnno),"Mix"))
stat_preAnno <- GEX.pure@meta.data[,c("cnt","rep","preAnno")]
stat_preAnno.s <- stat_preAnno %>%
group_by(cnt,rep,preAnno) %>%
summarise(count=n()) %>%
mutate(prop= count/sum(count)) %>%
ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.preAnno <- stat_preAnno.s %>%
ggplot(aes(x = preAnno, y = 100*prop, color=cnt)) +
geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
theme_classic(base_size = 15) +
scale_color_manual(values = c("skyblue","pink"), name="") +
labs(title="Cell Composition, preAnno", y = "Proportion") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
geom_point(aes(x=preAnno, y = 100*prop, color= cnt),
position = position_dodge(0.75),
shape=16,alpha=0.75,size=2.15,
stroke=0.15, show.legend=F)
pcom.preAnno
glm.nb - anova.LRT
Sort.N <- c("CTL","CKO")
glm.nb_anova.preAnno <- list()
for(x1 in 1:2){
for(x2 in 1:2){
if(x2>x1){
stat_preAnno.s_N <- stat_preAnno.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
total.N <- stat_preAnno.s_N %>%
group_by(cnt, rep) %>%
summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
rownames(total.N) <- paste0(as.character(total.N$cnt),
"_",
as.character(total.N$rep))
stat_preAnno.s_N$total <- total.N[paste0(stat_preAnno.s_N$cnt,
"_",
stat_preAnno.s_N$rep),"total"]
glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
for(AA in levels(stat_preAnno.s_N$preAnno)){
glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error = function(e){
NA
})
})
})
}
glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]])
}
}
}
glm.nb_anova.preAnno_df <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno)))
rownames(glm.nb_anova.preAnno_df) <- names(glm.nb_anova.preAnno)
colnames(glm.nb_anova.preAnno_df) <- gsub("X","C",colnames(glm.nb_anova.preAnno_df))
glm.nb_anova.preAnno_df
## Tn Treg Th17 Th.CC Th1 Th.Egr1
## CTLvsCKO 0.4031199 0.01076891 0.8867657 0.6924516 0.8183861 0.4245076
## Th2 iNKT MAIT gdT ILC2 ILC2.Ifit1
## CTLvsCKO 0.03566346 0.9218254 0.4287892 0.01213637 0.6876901 0.9914385
## ILC3.CC ILC3.Ncr1 ILC3.nn ILC3.Ccr6
## CTLvsCKO 0.01948181 0.6554751 0.5065082 0.9362884
round(glm.nb_anova.preAnno_df,4)
## Tn Treg Th17 Th.CC Th1 Th.Egr1 Th2 iNKT MAIT gdT
## CTLvsCKO 0.4031 0.0108 0.8868 0.6925 0.8184 0.4245 0.0357 0.9218 0.4288 0.0121
## ILC2 ILC2.Ifit1 ILC3.CC ILC3.Ncr1 ILC3.nn ILC3.Ccr6
## CTLvsCKO 0.6877 0.9914 0.0195 0.6555 0.5065 0.9363
Idents(GEX.pure) <- "cnt"
list_sort <- list()
for(nn in levels(GEX.pure$preAnno)){
list_sort[[nn]] <- c(nn)
}
names_sort <- names(list_sort)
data.frame(list_sort)
## Tn Treg Th17 Th.CC Th1 Th.Egr1 Th2 iNKT MAIT gdT ILC2 ILC2.Ifit1 ILC3.CC
## 1 Tn Treg Th17 Th.CC Th1 Th.Egr1 Th2 iNKT MAIT gdT ILC2 ILC2.Ifit1 ILC3.CC
## ILC3.Ncr1 ILC3.nn ILC3.Ccr6
## 1 ILC3.Ncr1 ILC3.nn ILC3.Ccr6
GEX.DEGs_sort <- list()
for(nn in names_sort){
GEX.DEGs_sort[[nn]] <- FindAllMarkers(subset(GEX.pure, subset = preAnno %in% list_sort[[nn]]),
assay = "RNA",
only.pos = T,
min.pct = 0.05,
logfc.threshold = 0.1,
test.use = "MAST")
}
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CTL
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
## Calculating cluster CKO
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
lapply(GEX.DEGs_sort, dim)
## $Tn
## [1] 86 7
##
## $Treg
## [1] 77 7
##
## $Th17
## [1] 51 7
##
## $Th.CC
## [1] 70 7
##
## $Th1
## [1] 72 7
##
## $Th.Egr1
## [1] 43 7
##
## $Th2
## [1] 47 7
##
## $iNKT
## [1] 92 7
##
## $MAIT
## [1] 67 7
##
## $gdT
## [1] 101 7
##
## $ILC2
## [1] 82 7
##
## $ILC2.Ifit1
## [1] 12 7
##
## $ILC3.CC
## [1] 99 7
##
## $ILC3.Ncr1
## [1] 192 7
##
## $ILC3.nn
## [1] 86 7
##
## $ILC3.Ccr6
## [1] 120 7
#GEX.DEGs_sort
# save DEGs' table
df.DEGs_sort <- data.frame()
for(nn in names_sort){
df.DEGs_sort <- rbind(df.DEGs_sort, data.frame(GEX.DEGs_sort[[nn]],preAnno=nn))
}
#write.csv(df.DEGs_sort, "20230927_10x_SZJ.new20270809.DEGs.CTLvsCKO.preAnno.csv")
df.DEGs_sort$preAnno <- factor(df.DEGs_sort$preAnno,
levels = levels(GEX.pure$preAnno))
cut.padj = 0.05
cut.log2FC = 0.1
cut.pct1 = 0.05
df.DEGs_sort %>% filter(p_val_adj < cut.padj &
abs(avg_log2FC) > cut.log2FC &
pct.1 > cut.pct1) %>%
group_by(preAnno,cluster) %>%
summarise(up.DEGs = n()) %>% as.data.frame()
## preAnno cluster up.DEGs
## 1 Tn CKO 4
## 2 Th.CC CKO 1
## 3 Th1 CTL 1
## 4 iNKT CKO 1
## 5 gdT CKO 4
## 6 ILC2 CKO 2
## 7 ILC3.Ncr1 CTL 20
## 8 ILC3.Ncr1 CKO 20
## 9 ILC3.nn CTL 3
## 10 ILC3.nn CKO 1
## 11 ILC3.Ccr6 CTL 3
## 12 ILC3.Ccr6 CKO 5
df.DEGs_sort %>% filter(p_val_adj < cut.padj &
abs(avg_log2FC) > cut.log2FC &
pct.1 > cut.pct1) %>%
group_by(preAnno,cluster) %>%
summarise(up.DEGs = n()) %>% as.data.frame() %>%
ggplot(aes(x=preAnno, y=up.DEGs, color = cluster)) +
geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
theme_classic(base_size = 15) +
scale_color_manual(values = c("skyblue","pink"), name="") +
labs(title=paste0("up.DEGs stat, pct.1>0.1, padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Proportion") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
title =element_text(size=12, face='bold'))
pp.DEGs.sort <- lapply(list_sort,function(x){NA})
GEX.DEGs_sort.combine <- GEX.DEGs_sort
GEX.DEGs_sort.combine <- lapply(GEX.DEGs_sort.combine, function(x){
x[x$cluster=="CTL","avg_log2FC"] <- -x[x$cluster=="CTL","avg_log2FC"]
x
})
pp.DEGs.sort$Tn <- GEX.DEGs_sort.combine$Tn %>%
mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
#!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
!grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
geom_point(shape=21, alpha=0.65) +
geom_text_repel(size=2.5, max.overlaps = 500) +
scale_fill_manual(values = c("CTL"="Skyblue",
"CKO"="pink",
"None"="grey")) +
theme_classic() + labs(title="Tn CTL vs CKO") +
guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$Tn
pp.DEGs.sort$Th.CC <- GEX.DEGs_sort.combine$Th.CC %>%
mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
#!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
!grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
geom_point(shape=21, alpha=0.65) +
geom_text_repel(size=2.5, max.overlaps = 500) +
scale_fill_manual(values = c("CTL"="Skyblue",
"CKO"="pink",
"None"="grey")) +
theme_classic() + labs(title="Th.CC CTL vs CKO") +
guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$Th.CC
pp.DEGs.sort$Th1 <- GEX.DEGs_sort.combine$Th1 %>%
mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
#!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
!grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
geom_point(shape=21, alpha=0.65) +
geom_text_repel(size=2.5, max.overlaps = 500) +
scale_fill_manual(values = c("CTL"="Skyblue",
"CKO"="pink",
"None"="grey")) +
theme_classic() + labs(title="Th1 CTL vs CKO") +
guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$Th1
pp.DEGs.sort$iNKT <- GEX.DEGs_sort.combine$iNKT %>%
mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
#!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
!grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
geom_point(shape=21, alpha=0.65) +
geom_text_repel(size=2.5, max.overlaps = 500) +
scale_fill_manual(values = c("CTL"="Skyblue",
"CKO"="pink",
"None"="grey")) +
theme_classic() + labs(title="iNKT CTL vs CKO") +
guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$iNKT
pp.DEGs.sort$gdT <- GEX.DEGs_sort.combine$gdT %>%
mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
#!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
!grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
geom_point(shape=21, alpha=0.65) +
geom_text_repel(size=2.5, max.overlaps = 500) +
scale_fill_manual(values = c("CTL"="Skyblue",
"CKO"="pink",
"None"="grey")) +
theme_classic() + labs(title="gdT CTL vs CKO") +
guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$gdT
pp.DEGs.sort$ILC2 <- GEX.DEGs_sort.combine$ILC2 %>%
mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
#!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
!grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
geom_point(shape=21, alpha=0.65) +
geom_text_repel(size=2.5, max.overlaps = 500) +
scale_fill_manual(values = c("CTL"="Skyblue",
"CKO"="pink",
"None"="grey")) +
theme_classic() + labs(title="ILC2 CTL vs CKO") +
guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$ILC2
pp.DEGs.sort$ILC3.Ncr1 <- GEX.DEGs_sort.combine$ILC3.Ncr1 %>%
mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
#!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
!grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
geom_point(shape=21, alpha=0.65) +
geom_text_repel(size=2.5, max.overlaps = 500) +
scale_fill_manual(values = c("CTL"="Skyblue",
"CKO"="pink",
"None"="grey")) +
theme_classic() + labs(title="ILC3.Ncr1 CTL vs CKO") +
guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$ILC3.Ncr1
pp.DEGs.sort$ILC3.nn <- GEX.DEGs_sort.combine$ILC3.nn %>%
mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
#!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
!grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
geom_point(shape=21, alpha=0.65) +
geom_text_repel(size=2.5, max.overlaps = 500) +
scale_fill_manual(values = c("CTL"="Skyblue",
"CKO"="pink",
"None"="grey")) +
theme_classic() + labs(title="ILC3.nn CTL vs CKO") +
guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$ILC3.nn
pp.DEGs.sort$ILC3.Ccr6 <- GEX.DEGs_sort.combine$ILC3.Ccr6 %>%
mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
#!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
!grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
geom_point(shape=21, alpha=0.65) +
geom_text_repel(size=2.5, max.overlaps = 500) +
scale_fill_manual(values = c("CTL"="Skyblue",
"CKO"="pink",
"None"="grey")) +
theme_classic() + labs(title="ILC3.Ccr6 CTL vs CKO") +
guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$ILC3.Ccr6
pp.DEGs.sort$Treg <- GEX.DEGs_sort.combine$Treg %>%
mutate(label=ifelse(((p_val_adj < 1e-2 & avg_log2FC<0) | (p_val_adj < 1e-2 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.2)) &
#!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
!grepl("Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
geom_point(shape=21, alpha=0.65) +
geom_text_repel(size=2.5, max.overlaps = 500) +
scale_fill_manual(values = c("CTL"="Skyblue",
"CKO"="pink",
"None"="grey")) +
theme_classic() + labs(title="Treg CTL vs CKO") +
guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-0.85,0.85))
pp.DEGs.sort$Treg
GEX.sub <- subset(GEX.pure, subset = preAnno %in% c("ILC2","ILC2.Ifit1",
"ILC3.CC","ILC3.Ncr1","ILC3.nn","ILC3.Ccr6"))
GEX.sub
## An object of class Seurat
## 16621 features across 14413 samples within 1 assay
## Active assay: RNA (16621 features, 1224 variable features)
## 3 dimensional reductions calculated: pca, tsne, umap
GEX.sub$preAnno <- as.character(GEX.sub$preAnno)
GEX.sub$preAnno[GEX.sub$preAnno %in% c("ILC2.Ifit1")] <- "ILC2"
GEX.sub$preAnno <- factor(GEX.sub$preAnno,
levels = c("ILC2",
"ILC3.CC","ILC3.Ncr1","ILC3.nn","ILC3.Ccr6"))
DimPlot(GEX.sub, reduction = "umap", label = T, group.by = "preAnno") +
DimPlot(GEX.sub, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.sub$FB.info,
clusters=GEX.sub$preAnno)[c(3:10),],
main = "Cell Count",
gaps_row = c(4),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,
pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.sub$FB.info,
clusters=GEX.sub$preAnno)[c(3:10),]),
main = "Cell Ratio",
gaps_row = c(4),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)
stat_preAnno <- GEX.sub@meta.data[,c("cnt","rep","preAnno")]
stat_preAnno.s <- stat_preAnno %>%
group_by(cnt,rep,preAnno) %>%
summarise(count=n()) %>%
mutate(prop= count/sum(count)) %>%
ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.preAnno <- stat_preAnno.s %>%
ggplot(aes(x = preAnno, y = 100*prop, color=cnt)) +
geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
theme_classic(base_size = 15) +
scale_color_manual(values = c("skyblue","pink"), name="") +
labs(title="Cell Composition, preAnno", y = "Proportion") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
geom_point(aes(x=preAnno, y = 100*prop, color= cnt),
position = position_dodge(0.75),
shape=16,alpha=0.75,size=2.15,
stroke=0.15, show.legend=F)
pcom.preAnno
glm.nb - anova.LRT
Sort.N <- c("CTL","CKO")
glm.nb_anova.preAnno <- list()
for(x1 in 1:2){
for(x2 in 1:2){
if(x2>x1){
stat_preAnno.s_N <- stat_preAnno.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
total.N <- stat_preAnno.s_N %>%
group_by(cnt, rep) %>%
summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
rownames(total.N) <- paste0(as.character(total.N$cnt),
"_",
as.character(total.N$rep))
stat_preAnno.s_N$total <- total.N[paste0(stat_preAnno.s_N$cnt,
"_",
stat_preAnno.s_N$rep),"total"]
glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
for(AA in levels(stat_preAnno.s_N$preAnno)){
glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error = function(e){
NA
})
})
})
}
glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]])
}
}
}
glm.nb_anova.preAnno_df <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno)))
rownames(glm.nb_anova.preAnno_df) <- names(glm.nb_anova.preAnno)
colnames(glm.nb_anova.preAnno_df) <- gsub("X","C",colnames(glm.nb_anova.preAnno_df))
glm.nb_anova.preAnno_df
## ILC2 ILC3.CC ILC3.Ncr1 ILC3.nn ILC3.Ccr6
## CTLvsCKO 0.5416787 0.01492285 0.4265305 0.2850411 0.9590854
round(glm.nb_anova.preAnno_df,5)
## ILC2 ILC3.CC ILC3.Ncr1 ILC3.nn ILC3.Ccr6
## CTLvsCKO 0.54168 0.01492 0.42653 0.28504 0.95909